## Setup locations
base = "01-cytograms"
here::i_am("01-cytograms.Rmd")
knitr::opts_chunk$set(fig.path = here::here("data", base, 'figures/'))
datadir = outputdir = here::here("data", base)
if(!dir.exists(outputdir)) dir.create(outputdir)

Clean and bin data

Here is a script to do the following; summarizing the workflow:

  1. Filter particles from the 3-minute level cytograms (i.e. remove min & max in all three dimensions).

  2. Transform fsc_small to diam.

  3. Combine 3-minute level cytograms into hourly cytograms.

  4. Bin this data to 40 equally sized bins in each axis (diam, pe, chl). Each bin value would be the sum (not the count) of the Qc (quantity of carbon) of the particles in that bin.

Here are all pilot cruises:

  • KM1508 (SCOPE 3)
  • KM1512 (SCOPE 5)
  • KM1513 (SCOPE 6)
  • KOK1515 (SCOPE 10)
  • KM1518 (SCOPE 11)
  • KM1601 (SCOPE 12)
  • KM1602 (SCOPE 13)
  • KOK1604 (SCOPE 15)
  • KOK1606 (SCOPE 16)
  • KOK1607 (SCOPE 17)
  • KOK1608 (SCOPE 18)
  • KOK1609 (SCOPE 19)
  • MGL1704
  • KM1708
  • KM1709
  • KM1712
  • KM1713
  • KM1717
  • KM1802
  • KM1805
  • FK180310-1
  • FK180310-2
  • KOK1801
  • KOK1803

Code for a single cruise

Here, we outline the actual steps to be taken on a server with a SLURM workload manager. Follow these steps (manual steps for now, but to be streamlined!):

Upload original files

Upload the .vct files (original, particle-level data files) to the server.

# for cruisename in KM1427 KM1510 KM1502 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608
# for cruisename in KM1427 KM1510 KM1502 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608 KOK1609
# for cruise_id in MGL1704 KM1708 KM1709 KM1712 KM1713 KM1717
# for cruise_id in KM1802 KM1805 FK180310-1 FK180310-2 KOK1801 KOK1803

# These are the three cruises that still needed to be uploaded.
for cruise_id in FK180310-1 FK180310-2 MGL1704
do
  ## Figure out how to convert between $cruisename and $cruise_id

    # cruise_id=FK180310-1
    # cruise_id=FK180310-2
    # cruise_id=KOK1803


  from=$cruise_id/orig
  to=discovery:scratchdir/output/flowmixapp/data/01-cytograms/$cruise_id/.
  rsync -av $from $to

done

for cruisename in KM1427 KM1510 KM1502 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608 KOK1609

for cruise_id in MGL1704 KM1708 KM1709 KM1712 KM1713 KM1717

for cruise_id in KM1802 KM1805 FK180310-1 FK180310-2 KOK1801 KOK1803

for cruise_id in KM1513;
do
  mkdir $cruise_id
  mkdir $cruise_id/orig
done

Run script (on server)

Use a slurm script run-bin.slurm directly, or, use run_bin KM1508 which uses the helper function:

run_bin (){
    cruise_id=$1
    sbatch  --export=cruise_id=$cruise_id run-bin.slurm
    return 0
}

The SLURM script ~/scripts/flowmixapp/run-bin.slurm is here:

#!/bin/bash
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --mem-per-cpu=10gb
#SBATCH --cpus-per-task=6
#SBATCH --time=1-00:00:00
#SBATCH --export=none  # Ensures job gets a fresh login environment
#SBATCH --mail-user=robohyun66@gmail.com
#SBATCH --mail-type=ALL
#SBATCH --job-name=bin
#SBATCH --output=./logs/%A_%a.log
#SBATCH --error=./logs/%A_%a.err

# Set the session up to use R
module load gcc/8.3.0
module load openblas/0.3.8
module load r/3.6.3

Rscript bin.R \
    mc.cores=6\
    cruise_id=$cruise_id\

exit

which calls the script ~/scripts/flowmixapp/bin.R (which uses helpers from ~/scripts/flowmixapp/01-helpers.R), shown here:

(Note, this cytogram data is rescaled so that the diameter is between 0 and 8.)

## TODO: copy this script into here.

This saves the cytograms (along with the covariates X, and other things like lat, lon, and time) to an RDS file (e.g. KM1508-datobj.RDS), on the server.

Download the files

Download these files from the server to ~/repos/flowmixapp/data/01-cytograms/KM1508/data/., using this script:

all_cruise_id="KM1508 KM1512 KM1513 KOK1515 KM1518 KM1601 KM1602 KOK1604 KOK1606 KOK1607 KOK1608 KOK1609 MGL1704 KM1708 KM1709 KM1713 KM1717 KOK1801 KM1712 KM1713 KM1802 KM1805 KOK1803 FK180310-1 FK180310-2"

for cruise_id in all_cruise_id
  do
    echo $cruise_id
    from=sangwonh@discovery.usc.edu:scratchdir/output/flowmixapp/data/01-cytograms/$cruise_id/data/$cruise_id-datobj.RDS
    to=~/repos/flowmixapp/data/01-cytograms/$cruise_id/data
    mkdir -p $to
    rsync -auv $from $to
done

for cruise_id in all_cruise_id
do
    from=~/repos/flowmixapp/data/01-cytograms/$cruise_id/viz/*
      to=~/repos/flowmixapp/data/01-cytograms/$cruise_id/viz-no-wind/.
    mkdir -p $to
    cp $from $to
done

1d cytogram plots

(Assume the processed and binned data files, e.g. KM1508-datobj.Rdata, have been downloaded.)

We’ll load and plot this cytogram data, in several ways. First, produce 1d plots:

all_cruise_id = c(c("KM1508", "KM1512", "KM1513", "KOK1515", "KM1518"),
                  c("KM1601", "KM1602", "KOK1604", "KOK1606"),
                  c("KOK1607", "KOK1608", "KOK1609"),
                  c("MGL1704", "KM1708","KM1709", "KM1713", "KM1717",  "KOK1801"),
                  c("KM1712", "KM1713", "KM1802", "KM1805", "KOK1803", "FK180310-1", "FK180310-2"))

qctablist = list()
for(cruise_id in all_cruise_id){

  cat("### ", cruise_id, "\n")

  ## Load the binned 3d data
  filename = paste0(cruise_id, "-datobj.RDS")
  ybin_obj = readRDS(file = file.path(datadir, cruise_id, "data", filename))
  ylist = ybin_obj$ylist
  qclist = ybin_obj$biomass_list
  
  ## Make plots of total QC at each time point
  qc_over_time = qclist %>% lapply(sum) %>% unlist()
  qctab = tibble(time = names(ylist) %>% lubridate::as_datetime(),
                 qcsum = as.numeric(qc_over_time))
  qctablist[[cruise_id]] = qctab
  p = qctab %>%
    ggplot() +
    geom_line(aes(x = time, y = qcsum), col = rgb(0,0,0,0.2)) +
    geom_point(aes(x = time, y = qcsum), cex = .5) +
    ylab("Sum of QC over time") +
    ggtitle(cruise_id) +
    scale_x_datetime(date_breaks = "6 hour", labels = scales::date_format("%b %d - %H:%M")) 
  p = p + theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
  p = p + theme(legend.position="none")
  plot(p)

  ## Make lat/lon plots
  X = ybin_obj$X
  p = X %>% select(time, lat, lon) %>% 
    pivot_longer(-one_of("time")) %>%
    ggplot() + 
    facet_wrap(~name, scale="free_y", ncol=1) +
    geom_line(aes(x=time, y=value)) +
    scale_x_datetime(date_breaks = "6 hour", labels = scales::date_format("%b %d - %H:%M")) 
  p = p + theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
  p = p + theme(legend.position="none")
  plot(p)

  ## Make 1d cytogram plot
  y_qc_list = Map(function(y, qc){ as_tibble(y) %>% add_column(qc = qc) }, ylist, qclist)
  for(dimname in c("diam", "chl", "pe")){

    ## Summarize to 1d binned data
    y_qc_list_1d = y_qc_list %>% purrr::map(. %>% group_by(!!as.name(dimname)) %>% summarise(qc=sum(qc)))
    qclist_1d = y_qc_list_1d %>% purrr::map(.%>% dplyr::pull(qc))
    ylist_1d = y_qc_list_1d %>% purrr::map(.%>% dplyr::pull(!!as.name(dimname)))

    ## Scaling or not?
    for(scale_at_each_time in c(FALSE, TRUE)){
      if(scale_at_each_time) qclist_1d = qclist_1d %>% lapply(function(qc) qc/sum(qc))

      ## Make plots
      mytitle = paste0(cruise_id, " - ", dimname)
      if(scale_at_each_time) mytitle = paste0(mytitle, ", density at each time")
      p = flowmix::bin_plot_1d(ylist_1d, qclist_1d)
      p = p + ggtitle(mytitle)
      p = p + theme_minimal()
      p = p + scale_x_datetime(date_breaks = "6 hour", labels = scales::date_format("%b %d - %H:%M")) 
      p = p + theme(axis.text.x = element_text(angle = 25, vjust = 1.0, hjust = 1.0))
      p = p + theme(legend.position="none")
      plot(p)

      ## Save to file as well.
      filename = paste0(cruise_id, "-1d-", dimname, ".png")
      if(scale_at_each_time) filename = paste0(cruise_id, "-1d-", dimname, "-density.png")
      if(!file.exists(filename)){
        ggsave(file = file.path(datadir, cruise_id, "viz", filename),
               width = 10, height = 3, units = "in", dpi = 300, p)
      }
    }
  }
  cat("\n\n")
}

KM1508

KM1512

KM1513

KOK1515

KM1518

KM1601

KM1602

KOK1604

KOK1606

KOK1607

KOK1608

KOK1609

MGL1704

KM1708

KM1709

KM1713

KM1717

KOK1801

KM1712

KM1713

KM1802

KM1805

KOK1803

FK180310-1

FK180310-2

saveRDS(qctablist, file = file.path(outputdir, "qctablist.RDS"))

2d cytogram plots

Next, 2d plots made into videos:

(The png figures and mp4 video will be saved to e.g. data/01-cytogram/MGL1704/viz. These are too large to place in github; instead, they have been placed here: Dropbox link )

## Load data
for(cruise_id in all_cruise_id){

  cat('###', cruise_id,' \n')

  filename = paste0(cruise_id, "-datobj.RDS")
  ybin_obj = readRDS(file = file.path(datadir, cruise_id, "data", filename))
  ylist = ybin_obj$ylist
  qclist = ybin_obj$biomass_list
  dir.create(file.path(datadir, cruise_id))

  ## Create the series of 2d plots
  TT = length(ylist)
  coul <- colorRampPalette(RColorBrewer::brewer.pal(8, "RdYlBu"))(25) %>% rev()
  for(tt in 1:TT){

    ## See if file has already been made
    filename = paste0(tt, ".png")
    plotfile = file.path(datadir, cruise_id, "viz", filename)
    p = plot_2d_threepanels(ylist = ylist, countslist = qclist,
                            tt = tt, colours = coul, cruise_id = cruise_id)
    if(!file.exists(plotfile)){
      ggsave(file = plotfile, width = 12, height = 4, units = "in", dpi = 300, p)
    }
    if(tt == 1){
      oneplotfile = file.path(datadir, cruise_id, "viz", paste0(tt, ".jpeg"))
      ggsave(file = oneplotfile, width = 12, height = 4, units = "in", dpi = 300, p)
    }
  }

  ## Make into a video
  videofile = file.path(outputdir, cruise_id, "viz", paste0(cruise_id, "-data.mp4"))
  if(!file.exists(videofile)){
    setwd(file.path(datadir, cruise_id, "viz"))
    system(paste0("ffmpeg -y -framerate 3 -i %d.png ", cruise_id, "-data.mp4"))
  }

  ## Embed video (Not doing this anymore, because the video file size is too big.)
  if(FALSE){
    cat(paste0('<video width="1280" height="720" controls>  <source src="', videofile, '" type="video/mp4"> </video>'))
  }

  ## Instead, we'll just embed one frame
  cat(paste0("![](", file.path(".", 
                               "data",
                               "01-cytograms",
                               cruise_id,
                               "viz",
                               "1.jpeg"),
             ")"))
  cat('\n\n')
}

KM1508

KM1512

KM1513

KOK1515

KM1518

KM1601

KM1602

KOK1604

KOK1606

KOK1607

KOK1608

KOK1609

MGL1704

KM1708

KM1709

KM1713

KM1717

KOK1801

KM1712

KM1713

KM1802

KM1805

KOK1803

FK180310-1

FK180310-2

knitr::knit_exit()